####################################################################
# Replication code for
#   Lyall, Shiraito, and Imai, "Coethnic Bias and Wartime Informing"
#
# Code to run Gibbs sampler for simulating the posterior
# distribution of the IRT model parameters
#
# Author: Yuki Shiraito
# Created: March 3, 2015
####################################################################


rm(list = ls())

library(endorse)
library(foreach)
library(doMC)
library(doRNG)

n.chains <- 3

load("data.RData")


### Formulae
formula.indiv <- formula(## ethnicity: indicator of ethno.tribe
                         ~ ethno.tribe +
                         ## socioeconomic variables 
                         mon_income + age + school_years +
                         ## contact variables and their interactions
                         contact_ff + contact_anp + contact_otherethno + 
                         contact_ff : ethno.tribe + contact_anp : ethno.tribe +
                         ## harm/damage variables
                         damagebyff.ProTaliban + damagebyff.Other + damagebyff.Tajik +
                         damagebyanp.ProTaliban + damagebyanp.Other + damagebyanp.Tajik+
                         damagebytaliban.ProTaliban + damagebytaliban.Other + damagebytaliban.Tajik)
formula.village <- formula(## SIGACTS
                           ~ ISAF.sigacts.2 + Insurg.sigacts.2 +
                           ## IMMAP
                           Unknown.immap.2 + ISAF.immap.2 + Insurg.immap.2 +
                           ## aid
                           nsp.0 +
                           ## military installations
                           base.2 +
                           ## geography
                           log.altitude + log.population + percent_pashtun +
                           taliban.control + contested)

### names
names.lambda <- colnames(model.matrix(formula.indiv, data))
names.kappa <- colnames(model.matrix(formula.village, data.village))


### Starting values
start.values <- list()
for (i in 1:n.chains) {
    s.start <- matrix(rnorm(2700 * 3, mean = 0, sd = 1), nrow = 2700, ncol = 3)
    kappa.start <- matrix(rnorm(ncol(model.matrix(formula.village, data.village)) * 2,
                                sd = 1),
                          ncol = 2, nrow = ncol(model.matrix(formula.village, data.village)))
    lambda.start1 <- matrix(rnorm(100 + (ncol(model.matrix(formula.indiv, data)) - 1),
                                  mean = c(model.matrix(formula.village, data.village) %*%
                                      as.matrix(kappa.start[, 1]),
                                      rep(0, (ncol(model.matrix(formula.indiv, data)) - 1))), sd = 1),
                            ncol = 1, nrow = 100 + (ncol(model.matrix(formula.indiv, data)) - 1))
    lambda.start2 <- matrix(rnorm(100 + (ncol(model.matrix(formula.indiv, data)) - 1),
                                  mean = c(model.matrix(formula.village, data.village) %*%
                                      as.matrix(kappa.start[, 2]),
                                      rep(0, (ncol(model.matrix(formula.indiv, data)) - 1))), sd = 1),
                            ncol = 1, nrow = 100 + (ncol(model.matrix(formula.indiv, data)) - 1))
    
    lambda.start <- cbind(lambda.start1, lambda.start2)
    x.start <- rnorm(2700)

    start.values[[i]] <- list(s.start = s.start, kappa.start = kappa.start, lambda.start = lambda.start,
                              x.start = x.start)
}

## Response variables
Y.1 <- list(Q1 = c("call_ff",
              "call_pashtun", "call_tajik"),
            Q2 = c("anon_ff",
              "anon_pashtun", "anon_tajik"),
            Q3 = c("dropin_ff",
              "dropin_pashtun", "dropin_tajik"))
Y.1.1 <- list(Q1 = c("retal_ff", "retal_pashtun", "retal_tajik"))
Y.2 <- list(Q1 = c("publicmeet_ff",
              "publicmeet_pashtun", "publicmeet_tajik"),
            Q2 = c("amnesty_ff",
              "amnesty_pashtun", "amnesty_tajik"),
            Q3 = c("alp_ff",
              "alp_pashtun", "alp_tajik"),
            Q4 = c("cashalp_ff",
              "cashalp_pashtun", "cashalp_tajik"))

## Question names
Q.1 <- c("Guardians", "Anonymity", "Stopping")
Q.1.1 <- "Retaliation"
Q.2 <- c("Meetings", "Amnesty", "Local Police", "Cash")

## Endorsement
group <- c("Pashtun", "Tajik")

## Settings
MCMC <- 50000
burn <- 25000
thin <- 10

registerDoMC(n.chains)
registerDoRNG()
## Fit the model
endorse.out.list <- foreach(i.chain = 1:n.chains) %dorng% {
    set.seed(i)
    endorse.out <- endorse(Y = Y.1,
                           data = data,
                           data.village = data.village,
                           na.strings = 9899,
                           village = "village_id",
                           identical.lambda = TRUE,
                           covariates = TRUE,
                           hierarchical = TRUE,
                           formula.indiv = formula.indiv,
                           formula.village = formula.village,
                           psi2.start = .3,
                           s.start = start.values[[i.chain]]$s.start,
                           lambda.start = start.values[[i.chain]]$lambda.start,
                           kappa.start = start.values[[i.chain]]$kappa.start,
                           x.start = start.values[[i.chain]]$x.start,
                           s0.omega2 = 1,
                           nu0.omega2 = 10,
                           s0.psi2 = 1,
                           nu0.psi2 = 10,
                           s0.rho2 = 1,
                           nu0.rho2 = 10,
                           tau.out = TRUE,
                           s.out = TRUE,
                           x.sd = FALSE,
                           seed.store = TRUE,
                           MCMC = MCMC,
                           burn = burn,
                           thin = thin,
                           mu.beta = 0,
                           precision.beta = 1 / 9,
                           precision.x = 1,
                           precision.theta = 1 / 9,
                           precision.kappa = 1 / 9,
                           precision.delta = 1 / 9,
                           precision.zeta = 1 / 9,
                           verbose = TRUE)
    endorse.out$x.sd <- apply(endorse.out$x, 1, sd)
    return(endorse.out)
}

#### Gelman-Rubin diagnostics
lambda.vars <- c(101:(ncol(endorse.out.list[[1]]$lambda)/2),
                 ((ncol(endorse.out.list[[1]]$lambda)/2) + 101):ncol(endorse.out.list[[1]]$lambda))
diag.psrf <- gelman.diag(mcmc.list(as.mcmc(cbind(endorse.out.list[[1]]$lambda[, lambda.vars],
                                                 endorse.out.list[[1]]$kappa) /
                                           as.double(endorse.out.list[[1]]$x.sd)),
                                   as.mcmc(cbind(endorse.out.list[[2]]$lambda[, lambda.vars], endorse.out.list[[2]]$kappa) /
                                           as.double(endorse.out.list[[2]]$x.sd)),
                                   as.mcmc(cbind(endorse.out.list[[3]]$lambda[, lambda.vars], endorse.out.list[[3]]$kappa) /
                                           as.double(endorse.out.list[[3]]$x.sd))))$psrf[, "Point est."]
more.iter <- !(max(diag.psrf) < 1.3)
save(endorse.out.list, file = "endorse.out.list.RData")

if (more.iter) {
    for (i in 1:25) {
        endorse.out.list <- foreach(i.chain = 1:n.chains) %dorng% {
            
            endorse.out <- endorse(Y = Y.1,
                                   data = data,
                                   data.village = data.village,
                                   na.strings = 9899,
                                   village = "village_id",
                                   identical.lambda = TRUE,
                                   covariates = TRUE,
                                   hierarchical = TRUE,
                                   formula.indiv = formula.indiv,
                                   formula.village = formula.village,
                                   update.start = endorse.out.list[[i.chain]],
                                   s0.omega2 = 1,
                                   nu0.omega2 = 10,
                                   s0.psi2 = 1,
                                   nu0.psi2 = 10,
                                   s0.rho2 = 1,
                                   nu0.rho2 = 10,
                                   tau.out = TRUE,
                                   s.out = TRUE,
                                   x.sd = FALSE,
                                   seed.store = TRUE,
                                   MCMC = MCMC,
                                   burn = burn,
                                   thin = thin,
                                   mu.beta = 0,
                                   precision.beta = 1 / 9,
                                   precision.x = 1,
                                   precision.theta = 1 / 9,
                                   precision.kappa = 1 / 9,
                                   precision.delta = 1 / 9,
                                   precision.zeta = 1 / 9,
                                   update = TRUE,
                                   verbose = TRUE)
            endorse.out$x.sd <- apply(endorse.out$x, 1, sd)
            return(endorse.out)
            
        }

        #### Gelman-Rubin diagnostics
        lambda.vars <- c(101:(ncol(endorse.out.list[[1]]$lambda)/2),
                         ((ncol(endorse.out.list[[1]]$lambda)/2) + 101):ncol(endorse.out.list[[1]]$lambda))
        diag.psrf <- gelman.diag(mcmc.list(as.mcmc(cbind(endorse.out.list[[1]]$lambda[, lambda.vars],
                                                         endorse.out.list[[1]]$kappa) /
                                                   as.double(endorse.out.list[[1]]$x.sd)),
                                           as.mcmc(cbind(endorse.out.list[[2]]$lambda[, lambda.vars], endorse.out.list[[2]]$kappa) /
                                                   as.double(endorse.out.list[[2]]$x.sd)),
                                           as.mcmc(cbind(endorse.out.list[[3]]$lambda[, lambda.vars], endorse.out.list[[3]]$kappa) /
                                                   as.double(endorse.out.list[[3]]$x.sd))))$psrf[, "Point est."]
        more.iter <- !(max(diag.psrf) < 1.3)
        save(endorse.out.list, file = "endorse.out.list.RData")

        if (!more.iter) break
    }
    ## iterations needed to get convergence
    (i + 1) * MCMC


}
